%load_ext autoreload
%autoreload 2
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset
from torchvision.datasets import ImageFolder
from torchvision.transforms import ToTensor, Compose, Resize, TrivialAugmentWide, Normalize
from torch.nn import CrossEntropyLoss, Softmax
import torchvision.models as models
import glob
import os
import PIL
import ipyplot
import tqdm
import cv2
import numpy as np
import random
import opendatasets as od
from dataclasses import dataclass
import pandas as pd
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)
cuda
od.download("https://www.kaggle.com/competitions/plant-seedlings-classification/data")
Skipping, found downloaded files in ".\plant-seedlings-classification" (use force=True to force download)
image_url = './plant-seedlings-classification/train/*/*.*'
image_list = []
category = []
import matplotlib.pyplot as plt
all_files = glob.glob(image_url)
for i in all_files:
image_list.append(i)
category.append(i.split('\\')[-2])
ipyplot.plot_class_representations( image_list, category,img_width=150, force_b64=True,show_url=False )
def mean_std_images(image_url:str, sample:int) -> tuple:
means = np.array([0, 0, 0], dtype=np.float32)
stds = np.array([0, 0, 0], dtype=np.float32)
total_images = 0
randomly_sample = sample
for f in tqdm.tqdm(random.sample(glob.glob(image_url, recursive = True), randomly_sample)):
img = cv2.imread(f)
means += img.mean(axis=(0,1))
stds += img.std(axis=(0,1))
total_images += 1
means = means / (total_images * 255.)
stds = stds / (total_images * 255.)
return means, stds
@dataclass
class PreprocessConfiguration:
batch_size: int = 32
resize:int = 224
train_size: float = 0.8
image_url_for_std: str = './plant-seedlings-classification/train/*/*.*'
image_url_for_train: str = './plant-seedlings-classification/train/'
num_workers:int = os.cpu_count()
prediction_data:bool = False
def preprocess_image_folder_data( preprocessing_configuration = PreprocessConfiguration()):
print('Step 1: Preprocessing Image')
all_files = glob.glob(preprocessing_configuration.image_url_for_train)
print('Step 1.1: Randomly calculating mean and standard for Train Transform normalize')
mean, std = mean_std_images(preprocessing_configuration.image_url_for_std, 3000) #torch.randint(int(len(all_files)*0.4), int(len(all_files)*0.5), (1,)).item())
if preprocessing_configuration.prediction_data:
train_transform = Compose([Resize((preprocessing_configuration.resize,preprocessing_configuration.resize)),
ToTensor(),
Normalize(mean=mean,std=std)])
else:
train_transform = Compose([Resize((preprocessing_configuration.resize,preprocessing_configuration.resize)),
TrivialAugmentWide(num_magnitude_bins=31),
ToTensor(),
Normalize(mean=mean,std=std)])
print('Step 1.2: Loading Image from folders')
full_train_dataset = ImageFolder(
root=preprocessing_configuration.image_url_for_train,
transform= train_transform
)
if not preprocessing_configuration.prediction_data:
train_size = int(preprocessing_configuration.train_size * len(full_train_dataset))
test_size = len(full_train_dataset) - train_size
print('Step 1.3: Train/Test Split Datasets')
train_data, test_data = torch.utils.data.random_split(full_train_dataset, [train_size, test_size])
if preprocessing_configuration.prediction_data:
valid_loader = DataLoader(
full_train_dataset, batch_size=BATCH_SIZE, shuffle=False,
num_workers=preprocessing_configuration.num_workers, pin_memory=True,
)
return valid_loader
BATCH_SIZE = preprocessing_configuration.batch_size
train_loader = DataLoader(
train_data, batch_size=BATCH_SIZE, shuffle=True,
num_workers=preprocessing_configuration.num_workers, pin_memory=True
)
valid_loader = DataLoader(
test_data, batch_size=BATCH_SIZE, shuffle=False,
num_workers=preprocessing_configuration.num_workers, pin_memory=True,
)
return train_loader, valid_loader
class ResNet(nn.Module):
def __init__(self):
super().__init__()
self.resnet = nn.Sequential(*(list(models.resnet101(pretrained=True).children())[:-2]))
self.Linear = nn.Linear(in_features=100352, out_features=12)
def forward(self, X):
X = self.resnet(X)
X = X.view(X.shape[0], -1 )
X = self.Linear(X)
return X
class TrainingConfiguration:
model_name: str = 'resnet_100_epochs'
epochs: int=100
learning_rate: float = 0.001
loss_criteron :nn = nn.CrossEntropyLoss()
model: nn.Module = ResNet().to(device)
optimizer: torch.optim = torch.optim.Adam
c:\Users\yaqoo\anaconda3\lib\site-packages\torchvision\models\_utils.py:208: UserWarning: The parameter 'pretrained' is deprecated since 0.13 and may be removed in the future, please use 'weights' instead. warnings.warn( c:\Users\yaqoo\anaconda3\lib\site-packages\torchvision\models\_utils.py:223: UserWarning: Arguments other than a weight enum or `None` for 'weights' are deprecated since 0.13 and may be removed in the future. The current behavior is equivalent to passing `weights=ResNet101_Weights.IMAGENET1K_V1`. You can also use `weights=ResNet101_Weights.DEFAULT` to get the most up-to-date weights. warnings.warn(msg)
def train_model(model, train_loader, loss_criteron, optimizer):
model.train()
loss_sum = 0
total_correct = 0
for batch, (X, y) in enumerate(train_loader):
y_logits = model(X.to(device))
loss = loss_criteron(y_logits, y.to(device))
y_pred = torch.argmax(torch.softmax(y_logits, dim=1), 1)
loss_sum += loss.to('cpu').item()
total_correct += torch.sum(torch.eq(y_pred, y.to(device))).to('cpu').item()
optimizer.zero_grad()
loss.backward()
optimizer.step()
accuracy = total_correct/len(train_loader.dataset)
avg_loss = loss_sum/len(train_loader.dataset)
return accuracy, avg_loss
def val(model, test_loader, loss_criteron):
model.eval()
loss_sum = 0
total_correct = 0
with torch.inference_mode():
for batch, (X, y) in enumerate(test_loader):
y_logits = model(X.to(device))
loss = loss_criteron(y_logits, y.to(device))
y_pred = torch.argmax(torch.softmax(y_logits, dim=1), dim=1)
loss_sum += loss.to('cpu').item()
total_correct += torch.sum(torch.eq(y_pred, y.to(device))).to('cpu').item()
accuracy = total_correct/len(test_loader.dataset)
avg_loss = loss_sum/len(test_loader.dataset)
return accuracy, avg_loss
def full_training(train_config: TrainingConfiguration = TrainingConfiguration()):
model = train_config.model
optimizer = train_config.optimizer(params=model.parameters(), lr=train_config.learning_rate)
train_loss = []
train_acc = []
test_loss = []
test_acc = []
current_best = 0
train, test = preprocess_image_folder_data()
print('Step 2: Training Model')
for epoch in tqdm.tqdm(range(train_config.epochs)):
accuracy_train, loss_train = train_model(model, train , train_config.loss_criteron, optimizer)
accuracy_test, loss_test = val(model, test, train_config.loss_criteron)
if current_best < accuracy_test:
current_best = accuracy_test
torch.save(model.state_dict(), './' + train_config.model_name +'_best_model.pth')
train_loss.append(loss_train)
train_acc.append(accuracy_train)
test_loss.append(loss_test)
test_acc.append(accuracy_test)
print('Epoch:', epoch + 1, '/', train_config.epochs, '| train_acc:', round(accuracy_train,2), '| train_loss:', round(loss_train,2), ' | test_acc:', round(accuracy_test,2), '| test_loss:', round(loss_test,2) )
model_performance_dict = {
'Train_Accuracy': train_acc,
'Train_Loss': train_loss,
'Test_Accuracy': test_acc,
'Test_Loss': test_loss
}
performance = pd.DataFrame(model_performance_dict)
model = torch.load('./' + train_config.model_name +'_best_model.pth')
return model, performance
resnet, resnet_performance = full_training()
Step 1: Preprocessing Image Step 1.1: Randomly calculating mean and standard for Train Transform normalize
100%|██████████| 3000/3000 [00:56<00:00, 53.22it/s]
Step 1.2: Loading Image from folders Step 1.3: Train/Test Split Datasets Step 2: Training Model
2%|▏ | 1/50 [01:38<1:20:33, 98.64s/it]
Epoch: 1 / 50 | train_acc: 0.12 | train_loss: 0.15 | test_acc: 0.15 | test_loss: 0.08
4%|▍ | 2/50 [03:34<1:27:06, 108.88s/it]
Epoch: 2 / 50 | train_acc: 0.27 | train_loss: 0.07 | test_acc: 0.4 | test_loss: 0.06
6%|▌ | 3/50 [05:28<1:26:56, 110.99s/it]
Epoch: 3 / 50 | train_acc: 0.42 | train_loss: 0.05 | test_acc: 0.48 | test_loss: 0.05
8%|▊ | 4/50 [07:06<1:21:20, 106.09s/it]
Epoch: 4 / 50 | train_acc: 0.52 | train_loss: 0.05 | test_acc: 0.53 | test_loss: 0.06
10%|█ | 5/50 [08:54<1:19:54, 106.55s/it]
Epoch: 5 / 50 | train_acc: 0.57 | train_loss: 0.04 | test_acc: 0.61 | test_loss: 0.16
12%|█▏ | 6/50 [10:49<1:20:19, 109.54s/it]
Epoch: 6 / 50 | train_acc: 0.62 | train_loss: 0.04 | test_acc: 0.57 | test_loss: 0.04
14%|█▍ | 7/50 [12:43<1:19:35, 111.06s/it]
Epoch: 7 / 50 | train_acc: 0.68 | train_loss: 0.03 | test_acc: 0.58 | test_loss: 0.04
16%|█▌ | 8/50 [14:39<1:18:46, 112.53s/it]
Epoch: 8 / 50 | train_acc: 0.71 | train_loss: 0.03 | test_acc: 0.69 | test_loss: 0.04
18%|█▊ | 9/50 [16:35<1:17:37, 113.60s/it]
Epoch: 9 / 50 | train_acc: 0.76 | train_loss: 0.02 | test_acc: 0.74 | test_loss: 0.03
20%|██ | 10/50 [18:26<1:15:14, 112.85s/it]
Epoch: 10 / 50 | train_acc: 0.77 | train_loss: 0.02 | test_acc: 0.71 | test_loss: 0.03
22%|██▏ | 11/50 [20:08<1:11:11, 109.53s/it]
Epoch: 11 / 50 | train_acc: 0.79 | train_loss: 0.02 | test_acc: 0.65 | test_loss: 0.07
24%|██▍ | 12/50 [22:04<1:10:32, 111.38s/it]
Epoch: 12 / 50 | train_acc: 0.81 | train_loss: 0.02 | test_acc: 0.72 | test_loss: 0.03
26%|██▌ | 13/50 [23:57<1:08:59, 111.89s/it]
Epoch: 13 / 50 | train_acc: 0.83 | train_loss: 0.02 | test_acc: 0.78 | test_loss: 0.02
28%|██▊ | 14/50 [25:49<1:07:10, 111.95s/it]
Epoch: 14 / 50 | train_acc: 0.83 | train_loss: 0.02 | test_acc: 0.72 | test_loss: 0.03
30%|███ | 15/50 [27:42<1:05:36, 112.46s/it]
Epoch: 15 / 50 | train_acc: 0.84 | train_loss: 0.02 | test_acc: 0.78 | test_loss: 0.02
32%|███▏ | 16/50 [29:34<1:03:36, 112.24s/it]
Epoch: 16 / 50 | train_acc: 0.85 | train_loss: 0.01 | test_acc: 0.74 | test_loss: 0.02
34%|███▍ | 17/50 [31:19<1:00:27, 109.91s/it]
Epoch: 17 / 50 | train_acc: 0.87 | train_loss: 0.01 | test_acc: 0.69 | test_loss: 0.04
36%|███▌ | 18/50 [33:01<57:25, 107.68s/it]
Epoch: 18 / 50 | train_acc: 0.88 | train_loss: 0.01 | test_acc: 0.76 | test_loss: 0.02
38%|███▊ | 19/50 [34:40<54:14, 104.98s/it]
Epoch: 19 / 50 | train_acc: 0.87 | train_loss: 0.01 | test_acc: 0.79 | test_loss: 0.02
40%|████ | 20/50 [36:18<51:25, 102.85s/it]
Epoch: 20 / 50 | train_acc: 0.88 | train_loss: 0.01 | test_acc: 0.83 | test_loss: 0.02
42%|████▏ | 21/50 [37:55<48:56, 101.26s/it]
Epoch: 21 / 50 | train_acc: 0.87 | train_loss: 0.01 | test_acc: 0.82 | test_loss: 0.02
44%|████▍ | 22/50 [39:32<46:39, 99.98s/it]
Epoch: 22 / 50 | train_acc: 0.89 | train_loss: 0.01 | test_acc: 0.8 | test_loss: 0.02
46%|████▌ | 23/50 [41:17<45:38, 101.43s/it]
Epoch: 23 / 50 | train_acc: 0.9 | train_loss: 0.01 | test_acc: 0.83 | test_loss: 0.02
48%|████▊ | 24/50 [42:55<43:32, 100.49s/it]
Epoch: 24 / 50 | train_acc: 0.9 | train_loss: 0.01 | test_acc: 0.77 | test_loss: 0.03
50%|█████ | 25/50 [44:33<41:33, 99.73s/it]
Epoch: 25 / 50 | train_acc: 0.85 | train_loss: 0.01 | test_acc: 0.77 | test_loss: 0.03
52%|█████▏ | 26/50 [46:12<39:44, 99.36s/it]
Epoch: 26 / 50 | train_acc: 0.87 | train_loss: 0.01 | test_acc: 0.77 | test_loss: 0.02
54%|█████▍ | 27/50 [47:50<37:54, 98.89s/it]
Epoch: 27 / 50 | train_acc: 0.88 | train_loss: 0.01 | test_acc: 0.82 | test_loss: 0.02
56%|█████▌ | 28/50 [49:27<36:07, 98.53s/it]
Epoch: 28 / 50 | train_acc: 0.88 | train_loss: 0.01 | test_acc: 0.82 | test_loss: 0.02
58%|█████▊ | 29/50 [51:05<34:22, 98.21s/it]
Epoch: 29 / 50 | train_acc: 0.9 | train_loss: 0.01 | test_acc: 0.8 | test_loss: 0.02
60%|██████ | 30/50 [52:42<32:40, 98.02s/it]
Epoch: 30 / 50 | train_acc: 0.91 | train_loss: 0.01 | test_acc: 0.85 | test_loss: 0.02
62%|██████▏ | 31/50 [54:20<31:01, 97.99s/it]
Epoch: 31 / 50 | train_acc: 0.91 | train_loss: 0.01 | test_acc: 0.84 | test_loss: 0.02
64%|██████▍ | 32/50 [55:58<29:21, 97.84s/it]
Epoch: 32 / 50 | train_acc: 0.91 | train_loss: 0.01 | test_acc: 0.85 | test_loss: 0.02
66%|██████▌ | 33/50 [57:41<28:12, 99.53s/it]
Epoch: 33 / 50 | train_acc: 0.92 | train_loss: 0.01 | test_acc: 0.85 | test_loss: 0.02
68%|██████▊ | 34/50 [59:18<26:20, 98.76s/it]
Epoch: 34 / 50 | train_acc: 0.93 | train_loss: 0.01 | test_acc: 0.84 | test_loss: 0.02
70%|███████ | 35/50 [1:00:56<24:36, 98.46s/it]
Epoch: 35 / 50 | train_acc: 0.93 | train_loss: 0.01 | test_acc: 0.84 | test_loss: 0.02
72%|███████▏ | 36/50 [1:02:33<22:54, 98.17s/it]
Epoch: 36 / 50 | train_acc: 0.93 | train_loss: 0.01 | test_acc: 0.82 | test_loss: 0.02
74%|███████▍ | 37/50 [1:04:11<21:12, 97.88s/it]
Epoch: 37 / 50 | train_acc: 0.91 | train_loss: 0.01 | test_acc: 0.8 | test_loss: 0.02
76%|███████▌ | 38/50 [1:05:53<19:50, 99.17s/it]
Epoch: 38 / 50 | train_acc: 0.91 | train_loss: 0.01 | test_acc: 0.85 | test_loss: 0.02
78%|███████▊ | 39/50 [1:07:31<18:07, 98.85s/it]
Epoch: 39 / 50 | train_acc: 0.93 | train_loss: 0.01 | test_acc: 0.85 | test_loss: 0.01
80%|████████ | 40/50 [1:09:04<16:12, 97.22s/it]
Epoch: 40 / 50 | train_acc: 0.92 | train_loss: 0.01 | test_acc: 0.73 | test_loss: 0.05
82%|████████▏ | 41/50 [1:10:40<14:31, 96.82s/it]
Epoch: 41 / 50 | train_acc: 0.87 | train_loss: 0.01 | test_acc: 0.82 | test_loss: 0.02
84%|████████▍ | 42/50 [1:12:23<13:10, 98.76s/it]
Epoch: 42 / 50 | train_acc: 0.9 | train_loss: 0.01 | test_acc: 0.77 | test_loss: 0.04
86%|████████▌ | 43/50 [1:14:05<11:36, 99.51s/it]
Epoch: 43 / 50 | train_acc: 0.93 | train_loss: 0.01 | test_acc: 0.86 | test_loss: 0.01
88%|████████▊ | 44/50 [1:15:41<09:50, 98.45s/it]
Epoch: 44 / 50 | train_acc: 0.92 | train_loss: 0.01 | test_acc: 0.85 | test_loss: 0.01
90%|█████████ | 45/50 [1:17:14<08:05, 97.01s/it]
Epoch: 45 / 50 | train_acc: 0.93 | train_loss: 0.01 | test_acc: 0.86 | test_loss: 0.01
92%|█████████▏| 46/50 [1:18:49<06:25, 96.27s/it]
Epoch: 46 / 50 | train_acc: 0.94 | train_loss: 0.01 | test_acc: 0.81 | test_loss: 0.03
94%|█████████▍| 47/50 [1:20:23<04:46, 95.59s/it]
Epoch: 47 / 50 | train_acc: 0.93 | train_loss: 0.01 | test_acc: 0.84 | test_loss: 0.02
96%|█████████▌| 48/50 [1:22:00<03:11, 95.99s/it]
Epoch: 48 / 50 | train_acc: 0.93 | train_loss: 0.01 | test_acc: 0.85 | test_loss: 0.02
98%|█████████▊| 49/50 [1:23:42<01:37, 97.86s/it]
Epoch: 49 / 50 | train_acc: 0.93 | train_loss: 0.01 | test_acc: 0.87 | test_loss: 0.01
100%|██████████| 50/50 [1:25:16<00:00, 102.33s/it]
Epoch: 50 / 50 | train_acc: 0.94 | train_loss: 0.01 | test_acc: 0.87 | test_loss: 0.01